{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn import *\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Time</th>\n",
       "      <th>V1</th>\n",
       "      <th>V2</th>\n",
       "      <th>V3</th>\n",
       "      <th>V4</th>\n",
       "      <th>V5</th>\n",
       "      <th>V6</th>\n",
       "      <th>V7</th>\n",
       "      <th>V8</th>\n",
       "      <th>V9</th>\n",
       "      <th>...</th>\n",
       "      <th>V21</th>\n",
       "      <th>V22</th>\n",
       "      <th>V23</th>\n",
       "      <th>V24</th>\n",
       "      <th>V25</th>\n",
       "      <th>V26</th>\n",
       "      <th>V27</th>\n",
       "      <th>V28</th>\n",
       "      <th>Amount</th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.359807</td>\n",
       "      <td>-0.072781</td>\n",
       "      <td>2.536347</td>\n",
       "      <td>1.378155</td>\n",
       "      <td>-0.338321</td>\n",
       "      <td>0.462388</td>\n",
       "      <td>0.239599</td>\n",
       "      <td>0.098698</td>\n",
       "      <td>0.363787</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.018307</td>\n",
       "      <td>0.277838</td>\n",
       "      <td>-0.110474</td>\n",
       "      <td>0.066928</td>\n",
       "      <td>0.128539</td>\n",
       "      <td>-0.189115</td>\n",
       "      <td>0.133558</td>\n",
       "      <td>-0.021053</td>\n",
       "      <td>149.62</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.191857</td>\n",
       "      <td>0.266151</td>\n",
       "      <td>0.166480</td>\n",
       "      <td>0.448154</td>\n",
       "      <td>0.060018</td>\n",
       "      <td>-0.082361</td>\n",
       "      <td>-0.078803</td>\n",
       "      <td>0.085102</td>\n",
       "      <td>-0.255425</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.225775</td>\n",
       "      <td>-0.638672</td>\n",
       "      <td>0.101288</td>\n",
       "      <td>-0.339846</td>\n",
       "      <td>0.167170</td>\n",
       "      <td>0.125895</td>\n",
       "      <td>-0.008983</td>\n",
       "      <td>0.014724</td>\n",
       "      <td>2.69</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.358354</td>\n",
       "      <td>-1.340163</td>\n",
       "      <td>1.773209</td>\n",
       "      <td>0.379780</td>\n",
       "      <td>-0.503198</td>\n",
       "      <td>1.800499</td>\n",
       "      <td>0.791461</td>\n",
       "      <td>0.247676</td>\n",
       "      <td>-1.514654</td>\n",
       "      <td>...</td>\n",
       "      <td>0.247998</td>\n",
       "      <td>0.771679</td>\n",
       "      <td>0.909412</td>\n",
       "      <td>-0.689281</td>\n",
       "      <td>-0.327642</td>\n",
       "      <td>-0.139097</td>\n",
       "      <td>-0.055353</td>\n",
       "      <td>-0.059752</td>\n",
       "      <td>378.66</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.966272</td>\n",
       "      <td>-0.185226</td>\n",
       "      <td>1.792993</td>\n",
       "      <td>-0.863291</td>\n",
       "      <td>-0.010309</td>\n",
       "      <td>1.247203</td>\n",
       "      <td>0.237609</td>\n",
       "      <td>0.377436</td>\n",
       "      <td>-1.387024</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.108300</td>\n",
       "      <td>0.005274</td>\n",
       "      <td>-0.190321</td>\n",
       "      <td>-1.175575</td>\n",
       "      <td>0.647376</td>\n",
       "      <td>-0.221929</td>\n",
       "      <td>0.062723</td>\n",
       "      <td>0.061458</td>\n",
       "      <td>123.50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-1.158233</td>\n",
       "      <td>0.877737</td>\n",
       "      <td>1.548718</td>\n",
       "      <td>0.403034</td>\n",
       "      <td>-0.407193</td>\n",
       "      <td>0.095921</td>\n",
       "      <td>0.592941</td>\n",
       "      <td>-0.270533</td>\n",
       "      <td>0.817739</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.009431</td>\n",
       "      <td>0.798278</td>\n",
       "      <td>-0.137458</td>\n",
       "      <td>0.141267</td>\n",
       "      <td>-0.206010</td>\n",
       "      <td>0.502292</td>\n",
       "      <td>0.219422</td>\n",
       "      <td>0.215153</td>\n",
       "      <td>69.99</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 31 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Time        V1        V2        V3        V4        V5        V6        V7  \\\n",
       "0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   \n",
       "1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   \n",
       "2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   \n",
       "3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   \n",
       "4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   \n",
       "\n",
       "         V8        V9  ...       V21       V22       V23       V24       V25  \\\n",
       "0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   \n",
       "1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   \n",
       "2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   \n",
       "3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   \n",
       "4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   \n",
       "\n",
       "        V26       V27       V28  Amount  Class  \n",
       "0 -0.189115  0.133558 -0.021053  149.62      0  \n",
       "1  0.125895 -0.008983  0.014724    2.69      0  \n",
       "2 -0.139097 -0.055353 -0.059752  378.66      0  \n",
       "3 -0.221929  0.062723  0.061458  123.50      0  \n",
       "4  0.502292  0.219422  0.215153   69.99      0  \n",
       "\n",
       "[5 rows x 31 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"/data/creditcard-fraud.csv.gz\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 284807 entries, 0 to 284806\n",
      "Data columns (total 31 columns):\n",
      "Time      284807 non-null float64\n",
      "V1        284807 non-null float64\n",
      "V2        284807 non-null float64\n",
      "V3        284807 non-null float64\n",
      "V4        284807 non-null float64\n",
      "V5        284807 non-null float64\n",
      "V6        284807 non-null float64\n",
      "V7        284807 non-null float64\n",
      "V8        284807 non-null float64\n",
      "V9        284807 non-null float64\n",
      "V10       284807 non-null float64\n",
      "V11       284807 non-null float64\n",
      "V12       284807 non-null float64\n",
      "V13       284807 non-null float64\n",
      "V14       284807 non-null float64\n",
      "V15       284807 non-null float64\n",
      "V16       284807 non-null float64\n",
      "V17       284807 non-null float64\n",
      "V18       284807 non-null float64\n",
      "V19       284807 non-null float64\n",
      "V20       284807 non-null float64\n",
      "V21       284807 non-null float64\n",
      "V22       284807 non-null float64\n",
      "V23       284807 non-null float64\n",
      "V24       284807 non-null float64\n",
      "V25       284807 non-null float64\n",
      "V26       284807 non-null float64\n",
      "V27       284807 non-null float64\n",
      "V28       284807 non-null float64\n",
      "Amount    284807 non-null float64\n",
      "Class     284807 non-null int64\n",
      "dtypes: float64(30), int64(1)\n",
      "memory usage: 67.4 MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    284315\n",
       "1       492\n",
       "Name: Class, dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.Class.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0.998273\n",
       "1    0.001727\n",
       "Name: Class, dtype: float64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.Class.value_counts()/len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "target = \"Class\"\n",
    "\n",
    "y = df[target]\n",
    "X = df.loc[:, \"V1\":\"V28\"]\n",
    "#X = df.drop(columns=target)\n",
    "#X = pd.get_dummies(X, drop_first=True)\n",
    "\n",
    "features = X.columns\n",
    "\n",
    "#X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y\n",
    "#                , test_size = 0.3, random_state = 1)\n",
    "\n",
    "training_size = int(0.7 * len(df))\n",
    "X_train = X.values[:training_size,:]\n",
    "y_train = y.values[:training_size]\n",
    "X_test = X.values[training_size:,:]\n",
    "y_test = y.values[training_size:]\n",
    "\n",
    "\n",
    "pipe = pipeline.Pipeline([\n",
    "    (\"poly\", preprocessing.PolynomialFeatures(degree = 1\n",
    "                                    , include_bias=False)),\n",
    "    (\"scaler\", preprocessing.StandardScaler()),\n",
    "    (\"est\", linear_model.LogisticRegression(solver=\"liblinear\"))\n",
    "])\n",
    "\n",
    "pipe.fit(X_train, y_train)\n",
    "\n",
    "y_train_predict = pipe.predict(X_train)\n",
    "y_test_predict = pipe.predict(X_test)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      1.00      1.00     85335\n",
      "           1       0.80      0.51      0.62       108\n",
      "\n",
      "    accuracy                           1.00     85443\n",
      "   macro avg       0.90      0.75      0.81     85443\n",
      "weighted avg       1.00      1.00      1.00     85443\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(metrics.classification_report(y_test, y_test_predict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "confusion metrics: \n",
      " [[85321    14]\n",
      " [   53    55]]\n"
     ]
    }
   ],
   "source": [
    "print(\"confusion metrics: \\n\", metrics.confusion_matrix(y_test, y_test_predict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TPR 0.5092592592592593\n",
      "FPR 0.00016405929571688052\n"
     ]
    }
   ],
   "source": [
    "print(\"TPR\", 55/(55+53))\n",
    "print(\"FPR\", 14/(14+85321))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train preicision:  0.8489208633093526\n",
      "test preicision:  0.7971014492753623\n",
      "train recall:  0.6145833333333334\n",
      "test recall:  0.5092592592592593\n",
      "train f1:  0.7129909365558912\n",
      "test f1:  0.6214689265536724\n",
      "train accuracy:  0.999046969362573\n",
      "test accuracy:  0.9992158515033414\n"
     ]
    }
   ],
   "source": [
    "print(\"confusion metrics: \\n\", metrics.confusion_matrix(y_test, y_test_predict))\n",
    "\n",
    "print(\"train preicision: \", metrics.precision_score(y_train, y_train_predict))\n",
    "print(\"test preicision: \", metrics.precision_score(y_test, y_test_predict))\n",
    "print(\"train recall: \", metrics.recall_score(y_train, y_train_predict))\n",
    "print(\"test recall: \", metrics.recall_score(y_test, y_test_predict))\n",
    "print(\"train f1: \", metrics.f1_score(y_train, y_train_predict))\n",
    "print(\"test f1: \", metrics.f1_score(y_test, y_test_predict))\n",
    "print(\"train accuracy: \", metrics.accuracy_score(y_train, y_train_predict))\n",
    "print(\"test accuracy: \", metrics.accuracy_score(y_test, y_test_predict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5, 1.0, 'ROC 0.9707251811488057')"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAcuUlEQVR4nO3df7xVdZ3v8dcbEFEBNUBDQA8kpkij1hnLMsPUBq2HlFnJzZs1Ftlc7c7Uo67VHeth3alsmubRXMthyqwet8ysjArzcW9q/iiKY/5EpSH8wfEXKAjyQxD43D/W2rj2Zu3DxnPW3pzzfT8fj/1gr7W+a63POuew33t91y9FBGZmlq5hnS7AzMw6y0FgZpY4B4GZWeIcBGZmiXMQmJklzkFgZpY4B4GZWeIcBLaDpIclbZK0XtKTkq6SNLqhzesl3SjpOUlrJf1C0oyGNmMl/aukR/NlLcuHxzdZb5ekmyRtlPSgpFP7qHGSpJ9LWi2pV9IFhWlvzNdXfIWkdxba/EO+bWslXSlp73z8oU3m/Xg+/a2SbpP0bD7/f0gaU1juVZK2NMw/PJ82UtK1+c83JM1q2Ka9JV0h6al8u34haVJh+oWSeiRtlnRVw7x9Lruh3YOSehvGv1nSnyStk7Rc0ryG6RdJeiif3iPpxMI0SfqypGfy12WSVJgekjYUfh7fKky7vuFntUXSvaW/dKteRPjlFxEB8DBwav7+5cDdwP8qTD8BWA/8d2AM8DLgC8AaYFreZiSwGPi/wAyyLxsHAf8InNFkvb8H/gXYB3gn8CwwoUnbm4B/BfYCjgFWAyc3aTsLeA7YLx/+G+Ap4GjgQOBm4EtN5p0KbAO68uH/AswG9s3nvR64otD+KuALTZY1Evh74ETgCWBWw/RP5j/rg4FRwPeBnxamnwW8HfgmcNXuLLvQ7jPALUBvYdxewFrgw4CAv85/v8fk018LbABek0//CLAKGJ5P/zCwFJgMTALuBy4oLD+Aw1v827sZuKTT/wdSfXW8AL/2nFcxCPLhy4BfFYZvBb5RMt/1wPfy9x/MP2xHt7jOI4DNwJiG9VxQ0nZ0/uEyoTBuPvD9Jsv+DvCdwvAPgH8qDJ8CPNlk3s8CN/VR91nAvYXhpkHQMF9vSRB8E7isMPxWYGnJvF9oDIJdLTsfPxV4ADi9IQgOzn+e+xbGLQbm5u/fA/yxMG2/vP3EfPh3wLzC9POBRYXhloIA6CIL3amd/PtP+eWuISslaTLZB8eyfHhf4PXAj0uaXwOclr8/Ffh1RKxvcVVHA8sj4rnCuLvz8TuV1fBv7f3Mkvr3Bc4Gvtuwrrsb1nOwpHEl63pfw7yNTgKWNIz7u7xr545id1QLvg28QdIhed3vJQvXgfJvwKeBTcWREfEU8EPgA5KGSzoBOAy4LW9yPTBc0mvzbq6/Be4Cnsynl/08G39vt+RdaT+V1NWkvvcBt0bEQy9l46z/HATW6DpJzwErgJVk34wh6wYaRtb90OgJoNb/P65Jm2ZGk3VPFK0l63qqk4fF7cA/Shol6dVkXUn7liz3ncDTwG/7WFftfd26JL2R7NvytWUFSzoNOA+4pDD668B0XuwGu0rSG8rmL/Fn4FHgMWAdcBRwaYvz9knSO4AREfGzJk1+SLYdm8n2xD4TESvyac8BPyELhs1kfwvzIv8aT/nPc3ThOMGbyL7tHwk8DvxS0oiSGt5HtkdlHeIgsEZvj4gxZP3rR/LiB/waYDswsWSeiWQfugDPNGnTzHpgbMO4sWQfQmXeS9bVsYKsS+X/kHWJNDqPrLuqeFfFxnXV3jeu6zzgJ2V7NZJeR9bFdHZE/Lk2PiL+FBHPRMTWiFiY13VWk21o9E2yYwPjyLpffsoA7BFI2o+se++iJtOPBH5E9kE8kuzb/CclvTVv8kGyvYCj8+nnkn2YH5JPL/t5rq/9zCPilojYEhHPkh1XmkoWcsUaTiQ7HlUautYeDgIrFRG/JfuW9s/58Aayg7rvKmn+buA3+fv/B/xN/iHUiiXAtOIZOGQHgRu7XWp1PRIRb4uICRHxWrIPzz8W20iaQhZk3ytZ1zEN63kqIp4pzLsP2Tbu1C0k6ThgAfC3EfGbxumNpVLfhdWXY8j6/ldHxGayrpzj1eQsq90wnewb+a2SniQLmIl5V00XWZfa0oi4ISK2R8RS4FdkXYK1un4REX/Op/+abG/v9fn0sp9n6e8tV/YzOY/swHirXYlWhU4fpPBrz3mx88HiCWRnjRybD5+YD3+UrDvlQLIDmM8C0/M2e5MdcPw12R7FMLIP60/T/KyhRWSBMwp4B32fNXRUvu7aN9SnG9vm67qlZN7ZZP3bM/Lab6ThrCGys4MeAdQwfibZQfD3NKnrbLKukmHAW8j2MmYVpu+db19vPn1UbR1kB7V/AuxPdibPp4HHCvOOyNt/keyMolFk3T19Ljuf7+WF11lkXTQvB4YDryD7Vv/mvP0ryI4JfShf7nlk3VbT8umnARuBI/PpF5AdhJ4EHEIWAhfk044Gjs3XM5rsTK+lwF6FuvfJf9dv7vTffuqvjhfg157zoiEI8nHfJOsmqQ2fSHaq33qy/uxfATMb5tk//4+/Im/3F7LTQ8c1WW9XvsxN+YdFMYzeCywpDP892SmMG8j6rrtLlvcgcH6TdX0s/0Bfl38A790w/Qbg8yXzfYesa2x94VWs61ayPvJ1ZAdNzyn52UbDqyufNo6sK2ll/sF4G3B8Yd7Plcz7uVaW3VDDLApnDeXj3g3cRxZcvcCXgWH5NJEdq3g0n/4A8F8L84qs62l1/rqMF8PtzfnvckO+XdeRf1kozD+XktD1q/2v2i/NzMwS5WMEZmaJcxCYmSXOQWBmljgHgZlZ4squ8tujjR8/Prq6ujpdhpnZoHLHHXc8HRETyqYNuiDo6uqip6en02WYmQ0qkh5pNs1dQ2ZmiXMQmJklzkFgZpY4B4GZWeIcBGZmiassCPIHg6+UdF+T6ZL0dWUPNr8nf8iImZm1WZV7BFeR3fa3mdPJ7pc+HZhHdpdLMzNrs8quI4iIW/p4RinAHF58gtQiSQdImhgRu/OYQzMbYA8+uY6F9/i/4Z7olKMO5pgpBwz4cjt5QdkksvvV1/Tm43b6C5Q0j2yvgUMPPbQtxZmlav5vl/PTOx9DrT5fzdrmoLGjhlwQlP2ZlT4cISLmA/MBuru7/QAFq8w1i1dw44MrO11GR93d+yxd4/bl5k+c3OlSrE06GQS9wJTC8GSyx+iZdcxVv3uY+59YxysPHrPrxkPU2FF7MeuVpbeksSGqk0GwALhQ0tXAa4G1Pj5grbjuzsf4+o3/2WT/sX9612zi1KMO4lvn/fXAL9xsD1VZEEj6IdkzUsdL6gU+S/ZgbiLiCmAhcAbZw7I3Ah+oqhYbGFu3be90CQDcvuxpHluzibcc/fIBX/bRk/bnHccdMuDLNduTVXnW0NxdTA/gv1W1fhtYX7r+Qa747V86XcYOkw7Yh3+be1ynyzAbEgbdbaitPR5+egMbt2zbMXz3imcZP3ok553Q1bmiCmZO3r/TJZgNGQ4C28mdj67hHd/43U7jj51yABedMr0DFZlZlRwEQ9Cyleu577G1L3n+B55YB8AnZ7+SaeNH7xh/1MR0z6QxG8ocBEPQJ6+9mz89+my/l3P6zIlMHb/fAFRkZnsyB8EQcf/j6/jlPdllGI+u3sgJ08bxT2e96iUvb7+9h3PQmFEDVZ6Z7cEcBEPEt297iJ/8qZe9hmcXbL/msAP9bd7MWuIgGAK+cfMybl/2NFNetg+3fvLNnS7HzAYZP5hmCLjytofZvHUbZ7xqYqdLMbNByHsEe6gvXv8A19/7ZEttV2/YzDnHH8qnTj+q4qrMbChyEOxBtmzdzvbIbqDz26Wr2LJ1Oye8Ytwu5+s+7EDe+erJVZdnZkOUg2APcW/vWs765u28sO3FO6nNPvrlfO09x3awKjNLgYOgQx59ZiPPbtqyY3jR8md4YVvw/td3cfDY7LTNk4/0rYDNrHoOgg5Yue55TvrKTaXTznt9l0/7NLO2chD0Q++ajSxavnq351v53PMAzDtpGq+d+rId4/ffZy+HgJm1nYOgH750/YP8sh8P+X7TERN4w+HjB7AiM7Pd5yBoYv3mrcy/ZTnPv7CtaZv7HlvLtAn78d0PHL/by997xDAOGutbOJhZ5zkImlj88Gq+/pv/ZOSIYQyXmrZ7619NZMrL9m1jZWZmA8tB0GD79uDTP7uXJY9nt2L+8YdP4JgpB3S4KjOz6vgWEw2e3rCZqxevYPWGLZx4+Hi6fPDWzIY47xHkVqzeyPu/80fWbtoKwEdmvYJzX3dYh6syM6teskEQEWwoPJN3yePr+MuqDRx+0GjecvTBnHzkQR2szsysfZINgk9cew/X3tG70/ivvusYHxMws6QkFwSrN2zhoac37Him72fOePGOnWNGjWDmpP07VZqZWUckFwTzvtdDzyNrgOymbh86aVqHKzIz66ykgiAi6HlkDa857EA+esp0Zh4yttMlmZl1XFJB8Oen1gMwaq9hvOkI39nTzAwSu45gwd2PAfC+E7o6W4iZ2R4kqSC47s7HAZjmi8TMzHZIKggee3YTbz/2EKYfPKbTpZiZ7TGSCYLInwW8fvPWDldiZrZnSSYIalcRzzjE1wmYmRVVGgSSZktaKmmZpItLph8q6SZJd0q6R9IZVdXy1LrsqWAjhze/pbSZWYoqCwJJw4HLgdOBGcBcSTMamv1P4JqIOA44B/hGVfXkPUN+doCZWYMq9wiOB5ZFxPKI2AJcDcxpaBNA7aqu/YHHqysnSwL18ZAZM7MUVRkEk4AVheHefFzR54BzJfUCC4GLyhYkaZ6kHkk9q1ateknF1PYIhjkHzMzqVBkEZR+50TA8F7gqIiYDZwDfl7RTTRExPyK6I6J7woSXdkXw9qgV5SQwMyuqMgh6gSmF4cns3PVzPnANQET8HhgFjK+imNjRNVTF0s3MBq8qg2AxMF3SVEkjyQ4GL2ho8yhwCoCko8iC4KX1/ezCc89n1w84B8zM6lUWBBGxFbgQuAF4gOzsoCWSLpV0Zt7s48CHJN0N/BB4f9Su/Bpgaze+UMVizcwGvUrvPhoRC8kOAhfHXVJ4fz/whiprqKl1CR1ywD7tWJ2Z2aCRzJXFNT5GYGZWL5kgqKbDycxs8EsnCPJ/ffqomVm9dIIgfPqomVmZZILAzMzKJRMEPkRgZlYunSCo3WLCXUNmZnWSCYIaHyw2M6uXUBC4c8jMrEwyQeCuITOzcskEQY2DwMysXjJB4I4hM7Ny6QSBH0xjZlYqnSDwg2nMzEolEwQ1zgEzs3rJBIHvPmpmVi6dIMj/ddeQmVm9ZIJg05at+TsngZlZUTJBoHxXYNt29xGZmRUlEwR7j8g2da/h3iMwMytKJgjMzKycg8DMLHEOAjOzxCUTBL6OwMysXDJBUCNfSGBmVie5IDAzs3oOAjOzxCUTBOEnEpiZlUomCGp8hMDMrF5yQWBmZvUqDQJJsyUtlbRM0sVN2rxb0v2Slkj6QZX1mJnZzkZUtWBJw4HLgdOAXmCxpAURcX+hzXTgU8AbImKNpIOqqsfMzMpVuUdwPLAsIpZHxBbgamBOQ5sPAZdHxBqAiFhZVTG+oMzMrFyVQTAJWFEY7s3HFR0BHCHpdkmLJM0uW5CkeZJ6JPWsWrWqX0X5ejIzs3pVBkHZR27j9/IRwHRgFjAX+JakA3aaKWJ+RHRHRPeECRMGvFAzs5RVGQS9wJTC8GTg8ZI2P4+IFyLiIWApWTCYmVmbVBkEi4HpkqZKGgmcAyxoaHMdcDKApPFkXUXLK6zJzMwaVBYEEbEVuBC4AXgAuCYilki6VNKZebMbgGck3Q/cBHwiIp6ppp4qlmpmNvhVdvooQEQsBBY2jLuk8D6Aj+WvtpCvLTYzq+Mri83MEucgMDNLnIPAzCxxyQSBjxWbmZVLJghqfGWxmVm95ILAzMzq7XYQSBou6b1VFGNmZu3XNAgkjZX0KUn/W9JblLmI7Mrfd7evRDMzq1JfF5R9H1gD/B74IPAJYCQwJyLuakNtAyp8abGZWam+gmBaRLwKQNK3gKeBQyPiubZUZmZmbdHXMYIXam8iYhvwkEPAzGzo6WuP4BhJ63jxuQL7FIYjIsZWXp2ZmVWuaRBExPB2FlI1HyEwMyvXNAgkjQIuAA4H7gGuzG8tPaj5gjIzs3p9HSP4LtAN3AucAXy1LRWZmVlb9XWMYEbhrKFvA39sT0lmZtZOrZ41NOi7hMzMrFxfewTH5mcJQXam0OA+a8hHi83MSvUVBHdHxHFtq6RN5KPFZmZ1+uoa8ndoM7ME9LVHcJCkpg+Vj4h/qaAeMzNrs76CYDgwmhevLDYzsyGoryB4IiIubVslFQv3dJmZlerrGMGQ3BMYkhtlZtYPfQXBKW2rwszMOqZpEETE6nYWYmZmneGH15uZJS6ZIPCTKs3MyiUTBDW+sNjMrF5yQWBmZvUcBGZmias0CCTNlrRU0jJJF/fR7mxJIam7ynrMzGxnlQWBpOHA5cDpwAxgrqQZJe3GAB8F/lBVLeA76JmZNVPlHsHxwLKIWB4RW4CrgTkl7T4PXAY8X2EtO8jXFpuZ1akyCCYBKwrDvfm4HSQdB0yJiF/2tSBJ8yT1SOpZtWrVwFdqZpawKoOg7Kv3jh4aScOArwEf39WCImJ+RHRHRPeECRMGsEQzM6syCHqBKYXhycDjheExwEzgZkkPA68DFlR1wNgXlJmZlasyCBYD0yVNlTQSOAdYUJsYEWsjYnxEdEVEF7AIODMieiqsyReUmZk1qCwIImIrcCFwA/AAcE1ELJF0qaQzq1qvmZntnr4eTNNvEbEQWNgw7pImbWdVWYuZmZXzlcVmZolLJgj8qEozs3LJBEGNjxWbmdVLLgjMzKyeg8DMLHEOAjOzxCUTBL6y2MysXDJBsIOPFpuZ1UkvCMzMrI6DwMwscQ4CM7PEJRMEPlZsZlYumSCo8aMqzczqJRcEZmZWz0FgZpY4B4GZWeLSCQJfWmxmViqdIMj5mcVmZvWSCwIzM6vnIDAzS1wyQeAjBGZm5ZIJghofIjAzq5dcEJiZWT0HgZlZ4hwEZmaJSyYIfD2ZmVm5ZIKgRr6izMysTnJBYGZm9RwEZmaJcxCYmSWu0iCQNFvSUknLJF1cMv1jku6XdI+k30g6rKpawkeLzcxKVRYEkoYDlwOnAzOAuZJmNDS7E+iOiL8CrgUuq6qeHXVVvQIzs0Gmyj2C44FlEbE8IrYAVwNzig0i4qaI2JgPLgImV1iPmZmVqDIIJgErCsO9+bhmzgeuL5sgaZ6kHkk9q1atGsASzcysyiAo64Up7aiXdC7QDXylbHpEzI+I7ojonjBhwgCWaGZmIypcdi8wpTA8GXi8sZGkU4HPAG+KiM1VFeNDxWZm5arcI1gMTJc0VdJI4BxgQbGBpOOAfwfOjIiVFdZSWGc71mJmNnhUFgQRsRW4ELgBeAC4JiKWSLpU0pl5s68Ao4EfS7pL0oImizMzs4pU2TVERCwEFjaMu6Tw/tQq129mZrvmK4vNzBKXTBD4wmIzs3LJBEGNfG2xmVmd5ILAzMzqOQjMzBKXTBD4EIGZWblkgmAHHyIwM6uTXhCYmVkdB4GZWeIcBGZmiUsmCPyoSjOzcskEQY3vPmpmVi+5IDAzs3oOAjOzxDkIzMwS5yAwM0tcckHgY8VmZvWSCwIzM6vnIDAzS5yDwMwscckEgS8sNjMrl0wQ1MiXFpuZ1UkuCMzMrJ6DwMwscQ4CM7PEJRME4acWm5mVSiYIanyo2MysXnJBYGZm9RwEZmaJSyYIfEGZmVm5ZIKgxteTmZnVqzQIJM2WtFTSMkkXl0zfW9KP8ul/kNRVZT1mZrazyoJA0nDgcuB0YAYwV9KMhmbnA2si4nDga8CXq6rHzMzKVblHcDywLCKWR8QW4GpgTkObOcB38/fXAqfINwMyM2urKoNgErCiMNybjyttExFbgbXAuMYFSZonqUdSz6pVq15SMdMmjOatr5rIMOeMmVmdERUuu+wTt/HcnVbaEBHzgfkA3d3dL+n8n9NmHMxpMw5+KbOamQ1pVe4R9AJTCsOTgcebtZE0AtgfWF1hTWZm1qDKIFgMTJc0VdJI4BxgQUObBcB5+fuzgRsjfMa/mVk7VdY1FBFbJV0I3AAMB66MiCWSLgV6ImIB8G3g+5KWke0JnFNVPWZmVq7KYwRExEJgYcO4SwrvnwfeVWUNZmbWt+SuLDYzs3oOAjOzxDkIzMwS5yAwM0ucBtvZmpJWAY+8xNnHA08PYDmDgbc5Dd7mNPRnmw+LiAllEwZdEPSHpJ6I6O50He3kbU6DtzkNVW2zu4bMzBLnIDAzS1xqQTC/0wV0gLc5Dd7mNFSyzUkdIzAzs52ltkdgZmYNHARmZokbkkEgabakpZKWSbq4ZPrekn6UT/+DpK72VzmwWtjmj0m6X9I9kn4j6bBO1DmQdrXNhXZnSwpJg/5Uw1a2WdK789/1Ekk/aHeNA62Fv+1DJd0k6c787/uMTtQ5UCRdKWmlpPuaTJekr+c/j3skvbrfK42IIfUiu+X1X4BpwEjgbmBGQ5u/A67I358D/KjTdbdhm08G9s3ffySFbc7bjQFuARYB3Z2uuw2/5+nAncCB+fBBna67Dds8H/hI/n4G8HCn6+7nNp8EvBq4r8n0M4DryZ7w+DrgD/1d51DcIzgeWBYRyyNiC3A1MKehzRzgu/n7a4FTpEH9MONdbnNE3BQRG/PBRWRPjBvMWvk9A3weuAx4vp3FVaSVbf4QcHlErAGIiJVtrnGgtbLNAYzN3+/Pzk9CHFQi4hb6flLjHOB7kVkEHCBpYn/WORSDYBKwojDcm48rbRMRW4G1wLi2VFeNVra56HyybxSD2S63WdJxwJSI+GU7C6tQK7/nI4AjJN0uaZGk2W2rrhqtbPPngHMl9ZI9/+Si9pTWMbv7/32XKn0wTYeUfbNvPEe2lTaDScvbI+lcoBt4U6UVVa/PbZY0DPga8P52FdQGrfyeR5B1D80i2+u7VdLMiHi24tqq0so2zwWuioivSjqB7KmHMyNie/XldcSAf34NxT2CXmBKYXgyO+8q7mgjaQTZ7mRfu2J7ula2GUmnAp8BzoyIzW2qrSq72uYxwEzgZkkPk/WlLhjkB4xb/dv+eUS8EBEPAUvJgmGwamWbzweuAYiI3wOjyG7ONlS19P99dwzFIFgMTJc0VdJIsoPBCxraLADOy9+fDdwY+VGYQWqX25x3k/w7WQgM9n5j2MU2R8TaiBgfEV0R0UV2XOTMiOjpTLkDopW/7evITgxA0niyrqLlba1yYLWyzY8CpwBIOoosCFa1tcr2WgC8Lz976HXA2oh4oj8LHHJdQxGxVdKFwA1kZxxcGRFLJF0K9ETEAuDbZLuPy8j2BM7pXMX91+I2fwUYDfw4Py7+aESc2bGi+6nFbR5SWtzmG4C3SLof2AZ8IiKe6VzV/dPiNn8c+A9J/0DWRfL+wfzFTtIPybr2xufHPT4L7AUQEVeQHQc5A1gGbAQ+0O91DuKfl5mZDYCh2DVkZma7wUFgZpY4B4GZWeIcBGZmiXMQmJklzkFg1iJJ2yTdVXh1SZolaW1+58sHJH02b1sc/6Ckf+50/WbNDLnrCMwqtCkiji2OyG9hfmtEvE3SfsBdkmr3NqqN3we4U9LPIuL29pZstmveIzAbIBGxAbgDeEXD+E3AXfTzxmBmVXEQmLVun0K30M8aJ0oaR3ZPoyUN4w8ku9/PLe0p02z3uGvIrHU7dQ3l3ijpTmA78KX8Fgiz8vH3AK/Mxz/ZxlrNWuYgMOu/WyPibc3GSzoCuC0/RnBXu4sz2xV3DZlVLCL+DHwR+B+drsWsjIPArD2uAE6SNLXThZg18t1HzcwS5z0CM7PEOQjMzBLnIDAzS5yDwMwscQ4CM7PEOQjMzBLnIDAzS9z/ByKTUrIfFq/dAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "y_test_prob = pipe.predict_proba(X_test)[:,1]\n",
    "fpr, tpr, thresholds = metrics.roc_curve(y_test, y_test_prob)\n",
    "\n",
    "plt.plot(fpr, tpr)\n",
    "plt.xlabel(\"FPR\")\n",
    "plt.ylabel(\"TPR\")\n",
    "plt.title(\"ROC \")\n",
    "auc = metrics.auc(fpr, tpr)\n",
    "plt.title(\"ROC \" + str(auc)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9707251811488057"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics.roc_auc_score(y_test, y_test_prob)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Sum of amounts in the TN bucket of the test dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7224977.579999999"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.Amount.values[training_size:][(y_test == 0) & (y_test_predict == 0)].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Sum of amounts in the FN bucket of the test dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8336.050000000001"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.Amount.values[training_size:][(y_test == 1) & (y_test_predict == 0)].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.11537821270304895"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "100 * 8336.05/7224977.58"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
